pacman::p_load(tmap, sf, DT, stplanr, sp, dplyr,
performance, reshape2, units,
ggpubr, tidyverse, mapview, httr, sfheaders, knitr, kableExtra)take home exercise 2 Singapore public bus commuter flows
#overview We currently have data on people’s movement patterns as well as information on schools, businesses, and retail activities in different areas. The goal of this task is to identify common Saturday activities among the population. Through our analysis, we aim to provide recommendations to the government, suggesting potential enhancements to existing facilities or proposing the development of new amenities that align with people’s preferences. The objective is to make these facilities more appealing and strategically located for the community’s convenience.
#Objective Our goal is to pinpoint popular weekend destinations, analyze the main facilities in those areas, and provide recommendations accordingly.
#Data Geospatial data: Passenger Volume by Origin Destination Bus Stops, Bus Stop Location, Train Station and Train Station Exit Point, Master Plan 2019 Subzone Boundary, HDB Property Information, Business, Entertn, F&B, FinServ, Leisure&Recreation and Retails. Aspatial data: HDB Property Information. This data is for us to use.
#Import the data
#Importing the OD data
odbus <- read_csv("data/aspatial/origin_destination_bus_202310.csv")Rows: 5694297 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): YEAR_MONTH, DAY_TYPE, PT_TYPE, ORIGIN_PT_CODE, DESTINATION_PT_CODE
dbl (2): TIME_PER_HOUR, TOTAL_TRIPS
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
odbus$ORIGIN_PT_CODE <- as.factor(odbus$ORIGIN_PT_CODE)
odbus$DESTINATION_PT_CODE <- as.factor(odbus$DESTINATION_PT_CODE) glimpse(odbus)Rows: 5,694,297
Columns: 7
$ YEAR_MONTH <chr> "2023-10", "2023-10", "2023-10", "2023-10", "2023-…
$ DAY_TYPE <chr> "WEEKENDS/HOLIDAY", "WEEKDAY", "WEEKENDS/HOLIDAY",…
$ TIME_PER_HOUR <dbl> 16, 16, 14, 14, 17, 17, 17, 7, 14, 14, 10, 20, 20,…
$ PT_TYPE <chr> "BUS", "BUS", "BUS", "BUS", "BUS", "BUS", "BUS", "…
$ ORIGIN_PT_CODE <fct> 04168, 04168, 80119, 80119, 44069, 20281, 20281, 1…
$ DESTINATION_PT_CODE <fct> 10051, 10051, 90079, 90079, 17229, 20141, 20141, 1…
$ TOTAL_TRIPS <dbl> 3, 5, 3, 5, 4, 1, 24, 2, 1, 7, 3, 2, 5, 1, 1, 1, 1…
weekendmorning11_14 <- odbus %>%
filter(DAY_TYPE == "WEEKENDS/HOLIDAY") %>%
filter(TIME_PER_HOUR >= 11 & TIME_PER_HOUR <= 14) %>%
group_by(ORIGIN_PT_CODE, DESTINATION_PT_CODE) %>%
summarise(TRIPS = sum(TOTAL_TRIPS), .groups = 'keep')datatable(weekendmorning11_14)Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html
write_rds(weekendmorning11_14, "data/rds/weekendmorning11_14.rds")weekendmorning11_14 <- read_rds("data/rds/weekendmorning11_14.rds")#Working with Geospatial Data
busstop <- st_read(dsn = "data/geospatial", layer = "BusStop") %>%
st_transform(crs = 3414)Reading layer `BusStop' from data source
`/Users/linxu/ISSS624/take home exercise 2/data/geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 5161 features and 3 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 3970.122 ymin: 26482.1 xmax: 48284.56 ymax: 52983.82
Projected CRS: SVY21
glimpse(busstop)Rows: 5,161
Columns: 4
$ BUS_STOP_N <chr> "22069", "32071", "44331", "96081", "11561", "66191", "2338…
$ BUS_ROOF_N <chr> "B06", "B23", "B01", "B05", "B05", "B03", "B02A", "B02", "B…
$ LOC_DESC <chr> "OPP CEVA LOGISTICS", "AFT TRACK 13", "BLK 239", "GRACE IND…
$ geometry <POINT [m]> POINT (13576.31 32883.65), POINT (13228.59 44206.38),…
busstop_points = busstop %>%
st_as_sf(coords = c("geometry"), crs = 3414, remove = FALSE)mapview_busstop_points = mapview(busstop_points, cex = 0.5, alpha = .5, popup = NULL)
mapview_busstop_pointsarea_honeycomb_grid = st_make_grid(busstop_points, c(375, 375), what = "polygons", square = FALSE)
honeycomb_grid_sf = st_sf(area_honeycomb_grid) %>%
mutate(grid_id = 1:length(lengths(area_honeycomb_grid)))
honeycomb_grid_sf$n_colli = lengths(st_intersects(honeycomb_grid_sf, busstop_points))
honeycomb_count = filter(honeycomb_grid_sf, n_colli > 0)map_honeycomb = tm_shape(honeycomb_count) +
tm_fill(
col = "n_colli",
palette = "Reds",
style = "cont",
title = "Number of collisions",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
popup.vars = c(
"Number of collisions: " = "n_colli"
),
popup.format = list(
n_colli = list(format = "f", digits = 0)
)
) +
tm_borders(col = "grey40", lwd = 0.7)
map_honeycomb
tmap_mode("plot")tmap mode set to plotting
busstop_honeycomb_count <- st_intersection(busstop, honeycomb_count) %>%
select(BUS_STOP_N, grid_id) %>%
st_drop_geometry()Warning: attribute variables are assumed to be spatially constant throughout
all geometries
glimpse(busstop_honeycomb_count)Rows: 5,162
Columns: 2
$ BUS_STOP_N <chr> "25059", "25059", "25751", "26379", "26369", "25761", "2638…
$ grid_id <int> 3, 86, 170, 173, 174, 211, 214, 255, 255, 258, 295, 296, 29…
write_rds(busstop_honeycomb_count, "data/rds/busstop_honeycomb_count.rds") weekendmorning11_14 <- left_join(weekendmorning11_14 , busstop_honeycomb_count,
by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
rename(ORIGIN_BS = ORIGIN_PT_CODE,
ORIGIN_SZ = grid_id,
DESTIN_BS = DESTINATION_PT_CODE)Warning in left_join(weekendmorning11_14, busstop_honeycomb_count, by = c(ORIGIN_PT_CODE = "BUS_STOP_N")): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 27736 of `x` matches multiple rows in `y`.
ℹ Row 3165 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
duplicate <- weekendmorning11_14 %>%
group_by_all() %>%
filter(n()>1) %>%
ungroup()weekendmorning11_14 <- unique(weekendmorning11_14)weekendmorning11_14 <- left_join(weekendmorning11_14 , busstop_honeycomb_count,
by = c("DESTIN_BS" = "BUS_STOP_N")) Warning in left_join(weekendmorning11_14, busstop_honeycomb_count, by = c(DESTIN_BS = "BUS_STOP_N")): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 206 of `x` matches multiple rows in `y`.
ℹ Row 3203 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
duplicate <- weekendmorning11_14 %>%
group_by_all() %>%
filter(n()>1) %>%
ungroup()weekendmorning11_14 <- unique(weekendmorning11_14)weekendmorning11_14 <- weekendmorning11_14 %>%
rename(DESTIN_SZ = grid_id) %>%
drop_na() %>%
group_by(ORIGIN_SZ, DESTIN_SZ) %>%
summarise(WEEKENDDAYMORNING_PEAK = sum(TRIPS))`summarise()` has grouped output by 'ORIGIN_SZ'. You can override using the
`.groups` argument.
write_rds(weekendmorning11_14, "data/rds/weekendmorning11_14.rds")weekendmorning11_14 <- read_rds("data/rds/weekendmorning11_14.rds")weekendmorning11_14_1 <- weekendmorning11_14[weekendmorning11_14$ORIGIN_SZ!=weekendmorning11_14$DESTIN_SZ,]flowLine <- od2line(flow = weekendmorning11_14_1,
zones = honeycomb_count,
zone_code = "grid_id")Creating centroids representing desire line start and end points.
tm_shape(honeycomb_count) +
tm_polygons() +
flowLine %>%
tm_shape() +
tm_lines(lwd = "WEEKENDDAYMORNING_PEAK",
style = "quantile",
scale = c(0.1, 1, 3, 5, 7, 10),
n = 6,
alpha = 0.3)Warning in g$scale * (w_legend/maxW): longer object length is not a multiple of
shorter object length
Warning in g$scale * (x/maxW): longer object length is not a multiple of
shorter object length

tm_shape(honeycomb_count) +
tm_polygons() +
flowLine %>%
filter(WEEKENDDAYMORNING_PEAK >= 2000) %>%
tm_shape() +
tm_lines(lwd = "WEEKENDDAYMORNING_PEAK",
style = "quantile",
scale = c(0.1, 1, 3, 5, 7, 10),
n = 6,
alpha = 0.3)Warning in g$scale * (w_legend/maxW): longer object length is not a multiple of
shorter object length
Warning in g$scale * (x/maxW): longer object length is not a multiple of
shorter object length

Caculate the distance
honeycomb_count <- read_rds("data/rds/honeycomb_count.rds")
honeycomb_countSimple feature collection with 2176 features and 2 fields
Geometry type: POLYGON
Dimension: XY
Bounding box: xmin: 3595.122 ymin: 26265.59 xmax: 48595.12 ymax: 53328.89
Projected CRS: SVY21 / Singapore TM
First 10 features:
area_honeycomb_grid grid_id n_colli
1 POLYGON ((3782.622 27889.39... 3 1
2 POLYGON ((4157.622 27889.39... 86 1
3 POLYGON ((4532.622 28538.91... 170 1
4 POLYGON ((4532.622 30487.47... 173 1
5 POLYGON ((4532.622 31136.99... 174 1
6 POLYGON ((4720.122 28214.15... 211 1
7 POLYGON ((4720.122 30162.71... 214 1
8 POLYGON ((4907.622 29837.95... 255 2
9 POLYGON ((4907.622 31786.51... 258 1
10 POLYGON ((5095.122 28863.67... 295 1
honeycomb_count_sp <- as(honeycomb_count, "Spatial")
honeycomb_count_spclass : SpatialPolygonsDataFrame
features : 2176
extent : 3595.122, 48595.12, 26265.59, 53328.89 (xmin, xmax, ymin, ymax)
crs : +proj=tmerc +lat_0=1.36666666666667 +lon_0=103.833333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
variables : 2
names : grid_id, n_colli
min values : 3, 1
max values : 9888, 10
dist <- spDists(honeycomb_count_sp,
longlat = FALSE)
head(dist, n=c(10, 10)) [,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,] 0.0000 375.0000 992.1567 2704.1635 3333.0729 992.1567 2459.0394
[2,] 375.0000 0.0000 750.0000 2625.0000 3269.1742 649.5191 2341.8742
[3,] 992.1567 750.0000 0.0000 1948.5572 2598.0762 375.0000 1634.5871
[4,] 2704.1635 2625.0000 1948.5572 0.0000 649.5191 2281.0359 375.0000
[5,] 3333.0729 3269.1742 2598.0762 649.5191 0.0000 2928.8436 992.1567
[6,] 992.1567 649.5191 375.0000 2281.0359 2928.8436 0.0000 1948.5572
[7,] 2459.0394 2341.8742 1634.5871 375.0000 992.1567 1948.5572 0.0000
[8,] 2250.0000 2087.9116 1352.0817 750.0000 1352.0817 1634.5871 375.0000
[9,] 4056.2452 3968.6270 3269.1742 1352.0817 750.0000 3577.2720 1634.5871
[10,] 1634.5871 1352.0817 649.5191 1718.4659 2341.8742 750.0000 1352.0817
[,8] [,9] [,10]
[1,] 2250.0000 4056.245 1634.5871
[2,] 2087.9116 3968.627 1352.0817
[3,] 1352.0817 3269.174 649.5191
[4,] 750.0000 1352.082 1718.4659
[5,] 1352.0817 750.000 2341.8742
[6,] 1634.5871 3577.272 750.0000
[7,] 375.0000 1634.587 1352.0817
[8,] 0.0000 1948.557 992.1567
[9,] 1948.5572 0.000 2928.8436
[10,] 992.1567 2928.844 0.0000
sz_names <- honeycomb_count$grid_idcolnames(dist) <- paste0(sz_names)
rownames(dist) <- paste0(sz_names)distPair <- melt(dist) %>%
rename(dist = value)
head(distPair, 10) Var1 Var2 dist
1 3 3 0.0000
2 86 3 375.0000
3 170 3 992.1567
4 173 3 2704.1635
5 174 3 3333.0729
6 211 3 992.1567
7 214 3 2459.0394
8 255 3 2250.0000
9 258 3 4056.2452
10 295 3 1634.5871
distPair %>%
filter(dist > 0) %>%
summary() Var1 Var2 dist
Min. : 3 Min. : 3 Min. : 375
1st Qu.:3470 1st Qu.:3470 1st Qu.: 7902
Median :5300 Median :5300 Median :12898
Mean :5037 Mean :5037 Mean :13632
3rd Qu.:6656 3rd Qu.:6656 3rd Qu.:18283
Max. :9888 Max. :9888 Max. :44926
distPair$dist <- ifelse(distPair$dist == 0,
50, distPair$dist)distPair %>%
summary() Var1 Var2 dist
Min. : 3 Min. : 3 Min. : 50
1st Qu.:3470 1st Qu.:3470 1st Qu.: 7902
Median :5300 Median :5300 Median :12898
Mean :5037 Mean :5037 Mean :13625
3rd Qu.:6656 3rd Qu.:6656 3rd Qu.:18283
Max. :9888 Max. :9888 Max. :44926
distPair <- distPair %>%
rename(orig = Var1,
dest = Var2)write_rds(distPair, "data/rds/distPair.rds") weekendmorning11_14 <- read_rds("data/rds/weekendmorning11_14.rds")flow_data <- weekendmorning11_14 %>%
group_by(ORIGIN_SZ, DESTIN_SZ) %>%
summarize(TRIPS = sum(WEEKENDDAYMORNING_PEAK)) `summarise()` has grouped output by 'ORIGIN_SZ'. You can override using the
`.groups` argument.
head(flow_data)# A tibble: 6 × 3
# Groups: ORIGIN_SZ [2]
ORIGIN_SZ DESTIN_SZ TRIPS
<int> <int> <dbl>
1 170 377 4
2 170 462 2
3 170 594 20
4 173 301 1
5 173 342 5
6 173 554 3
flow_data$FlowNoIntra <- ifelse(
flow_data$ORIGIN_SZ == flow_data$DESTIN_SZ,
0, flow_data$TRIPS)
flow_data$offset <- ifelse(
flow_data$ORIGIN_SZ == flow_data$DESTIN_SZ,
0.000001, 1)flow_data$ORIGIN_SZ <- as.integer(flow_data$ORIGIN_SZ)
flow_data$DESTIN_SZ <- as.integer(flow_data$DESTIN_SZ)flow_data1 <- flow_data %>%
left_join (distPair,
by = c("ORIGIN_SZ" = "orig",
"DESTIN_SZ" = "dest"))flow_data1 <- flow_data1 %>%
left_join(honeycomb_count,
by = c("DESTIN_SZ" = "grid_id")) %>%
rename(DIST = dist)summary(flow_data1) ORIGIN_SZ DESTIN_SZ TRIPS FlowNoIntra
Min. : 170 Min. : 170 Min. : 1.00 Min. : 0.00
1st Qu.:4569 1st Qu.:4573 1st Qu.: 2.00 1st Qu.: 2.00
Median :5699 Median :5698 Median : 8.00 Median : 7.00
Mean :5623 Mean :5616 Mean : 46.69 Mean : 46.51
3rd Qu.:6775 3rd Qu.:6743 3rd Qu.: 26.00 3rd Qu.: 26.00
Max. :9888 Max. :9888 Max. :43417.00 Max. :43417.00
offset DIST area_honeycomb_grid n_colli
Min. :0.000001 Min. : 50 POLYGON :161094 Min. : 1.00
1st Qu.:1.000000 1st Qu.: 2088 epsg:3414 : 0 1st Qu.: 2.00
Median :1.000000 Median : 4226 +proj=tmer...: 0 Median : 3.00
Mean :0.995934 Mean : 5213 Mean : 2.89
3rd Qu.:1.000000 3rd Qu.: 7377 3rd Qu.: 4.00
Max. :1.000000 Max. :24827 Max. :10.00
write_rds(flow_data1,
"data/rds/flow_data1.rds")Make flow data with weekend activies
url<-"https://www.onemap.gov.sg/api/common/elastic/search"
csv<-read_csv("data/aspatial/Generalinformationofschools.csv")Rows: 346 Columns: 31
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (31): school_name, url_address, address, postal_code, telephone_no, tele...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
postcodes<-csv$`postal_code`
found<-data.frame()
not_found<-data.frame()
for(postcode in postcodes){
query<-list('searchVal'=postcode,'returnGeom'='Y','getAddrDetails'='Y','pageNum'='1')
res<- GET(url,query=query)
if((content(res)$found)!=0){
found<-rbind(found,data.frame(content(res))[4:13])
} else{
not_found = data.frame(postcode)
}
}merged = merge(csv, found, by.x = 'postal_code', by.y = 'results.POSTAL', all = TRUE)
write.csv(merged, file = "data/aspatial/schools.csv")
write.csv(not_found, file = "data/aspatial/not_found.csv")schools <- read_csv("data/aspatial/schools.csv") %>%
rename(latitude = "results.LATITUDE",
longitude = "results.LONGITUDE")%>%
select(postal_code, school_name, latitude, longitude)New names:
Rows: 350 Columns: 41
── Column specification
──────────────────────────────────────────────────────── Delimiter: "," chr
(36): postal_code, school_name, url_address, address, telephone_no, tele... dbl
(5): ...1, results.X, results.Y, results.LATITUDE, results.LONGITUDE
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `` -> `...1`
schools <- schools[complete.cases(schools$longitude, schools$latitude), ]
schools_sf <- st_as_sf(schools,
coords = c("longitude", "latitude"),
crs=4326) %>%
st_transform(crs = 3414)tmap_options(check.and.fix = TRUE)
tm_shape(honeycomb_count) +
tm_polygons() +
tm_shape(schools_sf) +
tm_dots()
tmap_mode("plot")tmap mode set to plotting
honeycomb_count$`SCHOOL_COUNT`<- lengths(
st_intersects(
honeycomb_count, schools_sf))summary(honeycomb_count$SCHOOL_COUNT) Min. 1st Qu. Median Mean 3rd Qu. Max.
0.0000 0.0000 0.0000 0.1333 0.0000 4.0000
##公交站转换
RTSS_sf <- st_read(dsn = "data/geospatial/",
layer = "RapidTransitSystemStation")%>%
st_transform(crs = 3414)Reading layer `RapidTransitSystemStation' from data source
`/Users/linxu/ISSS624/take home exercise 2/data/geospatial'
using driver `ESRI Shapefile'
Warning in CPL_read_ogr(dsn, layer, query, as.character(options), quiet, : GDAL
Message 1: Non closed ring detected. To avoid accepting it, set the
OGR_GEOMETRY_ACCEPT_UNCLOSED_RING configuration option to NO
Simple feature collection with 220 features and 4 fields
Geometry type: POLYGON
Dimension: XY
Bounding box: xmin: 6068.209 ymin: 27478.44 xmax: 45377.5 ymax: 47913.58
Projected CRS: SVY21
RTSS_sf_polygons <- RTSS_sf[st_is_valid(RTSS_sf), ]
RTSS_sf_points <- st_centroid(RTSS_sf_polygons)Warning: st_centroid assumes attributes are constant over geometries
tmap_options(check.and.fix = TRUE)
tm_shape(honeycomb_count) +
tm_polygons() +
tm_shape(RTSS_sf_points) +
tm_dots()Warning: The shape RTSS_sf_points contains empty units.

tmap_mode("plot")tmap mode set to plotting
honeycomb_count$`RTSS_COUNT`<- lengths(
st_intersects(
honeycomb_count, RTSS_sf_points))summary(honeycomb_count$`RTSS_COUNT`) Min. 1st Qu. Median Mean 3rd Qu. Max.
0.00000 0.00000 0.00000 0.08823 0.00000 4.00000
entertn
entertn_sf <- st_read(dsn = "data/geospatial/",
layer = "entertn")%>%
st_transform(crs = 3414)Reading layer `entertn' from data source
`/Users/linxu/ISSS624/take home exercise 2/data/geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 114 features and 3 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 10809.34 ymin: 26528.63 xmax: 41600.62 ymax: 46375.77
Projected CRS: SVY21 / Singapore TM
tmap_options(check.and.fix = TRUE)
tm_shape(honeycomb_count) +
tm_polygons() +
tm_shape(entertn_sf) +
tm_dots()
tmap_mode("plot")tmap mode set to plotting
honeycomb_count$`ENTERTN_COUNT`<- lengths(
st_intersects(
honeycomb_count, entertn_sf))summary(honeycomb_count$ENTERTN_COUNT) Min. 1st Qu. Median Mean 3rd Qu. Max.
0.0000 0.0000 0.0000 0.0455 0.0000 7.0000
Liesure&Recreation
lr_sf <- st_read(dsn = "data/geospatial/",
layer = "Liesure&Recreation") %>%
st_transform(crs = 3414)Reading layer `Liesure&Recreation' from data source
`/Users/linxu/ISSS624/take home exercise 2/data/geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 1217 features and 30 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 6010.495 ymin: 25134.28 xmax: 48439.77 ymax: 50078.88
Projected CRS: SVY21 / Singapore TM
tmap_options(check.and.fix = TRUE)
tm_shape(honeycomb_count) +
tm_polygons() +
tm_shape(lr_sf) +
tm_dots()
tmap_mode("plot")tmap mode set to plotting
honeycomb_count$`LR_COUNT`<- lengths(
st_intersects(
honeycomb_count, lr_sf))summary(honeycomb_count$LR_COUNT) Min. 1st Qu. Median Mean 3rd Qu. Max.
0.0000 0.0000 0.0000 0.4007 0.0000 23.0000
##Retails
retails_sf <- st_read(dsn = "data/geospatial/",
layer = "Retails")%>%
st_transform(crs = 3414)Reading layer `Retails' from data source
`/Users/linxu/ISSS624/take home exercise 2/data/geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 37635 features and 3 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 4737.982 ymin: 25171.88 xmax: 48265.04 ymax: 50135.28
Projected CRS: SVY21 / Singapore TM
tmap_options(check.and.fix = TRUE)
tm_shape(honeycomb_count) +
tm_polygons() +
tm_shape(retails_sf) +
tm_dots()
tmap_mode("plot")tmap mode set to plotting
honeycomb_count$`RETAILS_COUNT`<- lengths(
st_intersects(
honeycomb_count, retails_sf))summary(honeycomb_count$RETAILS_COUNT) Min. 1st Qu. Median Mean 3rd Qu. Max.
0.00 0.00 2.00 15.42 9.00 987.00
#数据整合
honeycomb_count_tidy <- honeycomb_count %>%
st_drop_geometry() %>%
select(grid_id, SCHOOL_COUNT, RTSS_COUNT, ENTERTN_COUNT, LR_COUNT, RETAILS_COUNT)flow_data1 <- flow_data1 %>%
left_join(honeycomb_count_tidy,
by = c("DESTIN_SZ" = "grid_id")) summary(flow_data1) ORIGIN_SZ DESTIN_SZ TRIPS FlowNoIntra
Min. : 170 Min. : 170 Min. : 1.00 Min. : 0.00
1st Qu.:4569 1st Qu.:4573 1st Qu.: 2.00 1st Qu.: 2.00
Median :5699 Median :5698 Median : 8.00 Median : 7.00
Mean :5623 Mean :5616 Mean : 46.69 Mean : 46.51
3rd Qu.:6775 3rd Qu.:6743 3rd Qu.: 26.00 3rd Qu.: 26.00
Max. :9888 Max. :9888 Max. :43417.00 Max. :43417.00
offset DIST area_honeycomb_grid n_colli
Min. :0.000001 Min. : 50 POLYGON :161094 Min. : 1.00
1st Qu.:1.000000 1st Qu.: 2088 epsg:3414 : 0 1st Qu.: 2.00
Median :1.000000 Median : 4226 +proj=tmer...: 0 Median : 3.00
Mean :0.995934 Mean : 5213 Mean : 2.89
3rd Qu.:1.000000 3rd Qu.: 7377 3rd Qu.: 4.00
Max. :1.000000 Max. :24827 Max. :10.00
SCHOOL_COUNT RTSS_COUNT ENTERTN_COUNT LR_COUNT
Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. : 0.0000
1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 0.0000
Median :0.0000 Median :0.0000 Median :0.0000 Median : 0.0000
Mean :0.1399 Mean :0.1862 Mean :0.1138 Mean : 0.7081
3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.: 1.0000
Max. :4.0000 Max. :4.0000 Max. :7.0000 Max. :23.0000
RETAILS_COUNT
Min. : 0.00
1st Qu.: 1.00
Median : 6.00
Mean : 35.52
3rd Qu.: 26.00
Max. :987.00
flow_data1$SCHOOL_COUNT <- ifelse(
flow_data1$SCHOOL_COUNT == 0,
0.99, flow_data1$SCHOOL_COUNT)
flow_data1$RTSS_COUNT <- ifelse(
flow_data1$RTSS_COUNT == 0,
0.99, flow_data1$RTSS_COUNT)
flow_data1$ENTERTN_COUNT <- ifelse(
flow_data1$ENTERTN_COUNT == 0,
0.99, flow_data1$ENTERTN_COUNT)
flow_data1$LR_COUNT <- ifelse(
flow_data1$LR_COUNT == 0,
0.99, flow_data1$LR_COUNT)
flow_data1$RETAILS_COUNT <- ifelse(
flow_data1$RETAILS_COUNT == 0,
0.99, flow_data1$RETAILS_COUNT)summary(flow_data1) ORIGIN_SZ DESTIN_SZ TRIPS FlowNoIntra
Min. : 170 Min. : 170 Min. : 1.00 Min. : 0.00
1st Qu.:4569 1st Qu.:4573 1st Qu.: 2.00 1st Qu.: 2.00
Median :5699 Median :5698 Median : 8.00 Median : 7.00
Mean :5623 Mean :5616 Mean : 46.69 Mean : 46.51
3rd Qu.:6775 3rd Qu.:6743 3rd Qu.: 26.00 3rd Qu.: 26.00
Max. :9888 Max. :9888 Max. :43417.00 Max. :43417.00
offset DIST area_honeycomb_grid n_colli
Min. :0.000001 Min. : 50 POLYGON :161094 Min. : 1.00
1st Qu.:1.000000 1st Qu.: 2088 epsg:3414 : 0 1st Qu.: 2.00
Median :1.000000 Median : 4226 +proj=tmer...: 0 Median : 3.00
Mean :0.995934 Mean : 5213 Mean : 2.89
3rd Qu.:1.000000 3rd Qu.: 7377 3rd Qu.: 4.00
Max. :1.000000 Max. :24827 Max. :10.00
SCHOOL_COUNT RTSS_COUNT ENTERTN_COUNT LR_COUNT
Min. :0.990 Min. :0.990 Min. :0.990 Min. : 0.990
1st Qu.:0.990 1st Qu.:0.990 1st Qu.:0.990 1st Qu.: 0.990
Median :0.990 Median :0.990 Median :0.990 Median : 0.990
Mean :1.007 Mean :1.021 Mean :1.035 Mean : 1.407
3rd Qu.:0.990 3rd Qu.:0.990 3rd Qu.:0.990 3rd Qu.: 1.000
Max. :4.000 Max. :4.000 Max. :7.000 Max. :23.000
RETAILS_COUNT
Min. : 0.99
1st Qu.: 1.00
Median : 6.00
Mean : 35.70
3rd Qu.: 26.00
Max. :987.00
write_rds(flow_data1,
"data/rds/flow_data_tidy.rds")##SIM
flow_data2 <- read_rds("data/rds/flow_data_tidy.rds")flow_data2 <- flow_data2 %>%
mutate(ORIGIN_SZ = as.character(ORIGIN_SZ), DESTIN_SZ = as.character(DESTIN_SZ))glimpse(flow_data2)Rows: 161,094
Columns: 13
Groups: ORIGIN_SZ [2,137]
$ ORIGIN_SZ <chr> "170", "170", "170", "173", "173", "173", "173", "…
$ DESTIN_SZ <chr> "377", "462", "594", "301", "342", "554", "555", "…
$ TRIPS <dbl> 4, 2, 20, 1, 5, 3, 31, 12, 1, 1, 9, 42, 23, 4, 14,…
$ FlowNoIntra <dbl> 4, 2, 20, 1, 5, 3, 31, 12, 1, 1, 9, 42, 23, 4, 14,…
$ offset <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ DIST <dbl> 992.1567, 1634.5871, 6139.0146, 2341.8742, 2087.91…
$ area_honeycomb_grid <POLYGON [m]> POLYGON ((5470.122 28214.15..., POLYGON ((…
$ n_colli <int> 1, 1, 5, 2, 2, 4, 4, 6, 1, 1, 4, 4, 6, 2, 2, 1, 1,…
$ SCHOOL_COUNT <dbl> 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.…
$ RTSS_COUNT <dbl> 0.99, 0.99, 1.00, 0.99, 0.99, 0.99, 0.99, 1.00, 0.…
$ ENTERTN_COUNT <dbl> 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.…
$ LR_COUNT <dbl> 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.…
$ RETAILS_COUNT <dbl> 0.99, 0.99, 2.00, 0.99, 0.99, 0.99, 0.99, 9.00, 1.…
kable(head(flow_data2[, 1:5], n = 5))| ORIGIN_SZ | DESTIN_SZ | TRIPS | FlowNoIntra | offset |
|---|---|---|---|---|
| 170 | 377 | 4 | 4 | 1 |
| 170 | 462 | 2 | 2 | 1 |
| 170 | 594 | 20 | 20 | 1 |
| 173 | 301 | 1 | 1 | 1 |
| 173 | 342 | 5 | 5 | 1 |
flow_data2$FlowNoIntra <- ifelse(
flow_data2$ORIGIN_SZ == flow_data2$DESTIN_SZ,
0, flow_data2$TRIPS)
flow_data2$offset <- ifelse(
flow_data2$ORIGIN_SZ == flow_data2$DESTIN_SZ,
0.000001, 1)inter_zonal_flow <- flow_data2 %>%
filter(FlowNoIntra > 0)